# 提取 猫眼电影 TOPlOO 的电影名称、时间、评分、图片  https://maoyan.com/board/4
import urllib3
import pandas as pd
from pandas import DataFrame
import re
def findMYHtml(http=urllib3.PoolManager(),offset=0):
    headers={
        'User-Agent':"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36"
    }
    response=http.request('GET','https://maoyan.com/board/4',fields={'offset':offset},headers=headers)
    return response.data
def mergeColum(ss):
    ss['grade']=float(ss['p1']+ss['p2'])
    return ss
if __name__ == "__main__":
    http=urllib3.PoolManager()
    data=findMYHtml(http)
    with open('E:/test.html','wb') as f:
        f.write(data)
    content=data.decode('utf-8')
    pattern=re.compile('board-index.*?(\d+)</i>.*?<img.*?data-src="(.*?)".*?class="name".*?title="(.*?)".*?releasetime.*?(\d{4}-\d{2}-\d{2}).*?score.*?(\d{1,2}\.).*?fraction">(\d{1,2})<',re.S)
    result=re.findall(pattern,content)
    df=DataFrame(result,columns=['level','pictureUrl','name','releaseDate','p1','p2'])
    df=df.apply(mergeColum,axis=1)
    df.drop(columns=['p1','p2'],inplace=True)
    print(df)
    df.to_excel('E:/123.xls')
